
# A2_BLP_sub
#==============================================================================

# Description: This code estimates the demand side parameters for the case in 
# which observations with small shares are deleted. # The code builds on Dube, 
# Fox, and Su's (2012) code and is therefore very similar. The main differences 
# are: (1) allowing for parameters to vary with income and (2) it does not use 
# synthetic data


# Specify Predict category and which initial guess to be used
#-------------------------------------------------------------------------------

i_group = 1     # Specifies which estimation file is used among data_summary_full_`i_group'
kk = 10          # Product Category
reps = 3        # Initial Guess


# Load packages and source functions that are used in the script
#-------------------------------------------------------------------------------

library("Matrix")
library("ipoptr")  
library("data.table")  
library("Rcpp")  
library("RcppArmadillo") 

Rcpp::sourceCpp("MPEC_C_codes.cpp")

source("mksharesim_FE.R")
source("invertshares_FE.R")
source("ind_shnorm.R")
source("GMMMPEC_f_ktr_FE.R")
source("GMMMPEC_f_grad_ktr_FE.R")
source("GMMMPEC_c_ktr_FE.R")
source("GMMMPEC_dc_ktr_FE.R")
source("GMMMPEC_hess_ktr_FE.R")
source("ind_shnormMPEC.R")
source("jacobian.R")


# Import and format data
#-------------------------------------------------------------------------------

group_name <- sprintf('data_summary_full_%s.RData', i_group)
load(group_name)
load('income_mu_sigma.RData')
data = data_summary[[kk]]

data <- as.data.table(data)
data <- data[share > 0.0015]                                                     # Require Minimum Share
#data <- data[share > 0.0017]                                                     # Require Minimum Share
data <- data[year < 2006]

if (nrow(data) > 0) {

data[ , share_total := sum(share), by=c("year", "quarter", "declarant")]
data$share <- data$share / data$share_total                                      # Adjust Shares for deleted Observations
data[ , products := .N, by=c("year", "quarter", "declarant")]
data <- data[products > 1]

data <- transform(data,id=as.numeric(factor(data$market_id)))
data$V18 = data$id
data$id = NULL
market_id = as.matrix(data$V18)

gdp_per_capita_log = as.matrix(log(data$gdp_per_capita_defl_partner))
data$gdp_per_capita_defl_partner = data$gdp_per_capita_defl_partner/1000
distance_log = as.matrix(log(data$distance))
data$distance=data$distance/1000
declarant = as.matrix(data$declarant)
partner = as.matrix(data$partner)
year = as.matrix(data$year)
quarter = as.matrix(data$quarter)
population = as.matrix(data$population_partner)
population_log = log(population)

nn <- 100                            
tol_inner<-1.e-14
tol_outer<-1.e-6
prods = max(data$products)
T = market_id[nrow(market_id),1]

starts <- 5

costparams<-matrix(0.5,6,1)
betatrue<-c(2, 1.5, 0.5, -2)
betatrue = as.matrix(betatrue)
betatrue0=betatrue
K_A<-nrow(betatrue)-1


prodsMarket = matrix(0,T,1)
prodsMarket_temp = as.matrix(data$products)
for (t in 1:T) {
  prodsMarket_temp2 = as.matrix(prodsMarket_temp[market_id==t,1])
  prodsMarket[t,1] = prodsMarket_temp2[1,1]
}


marketStarts<-matrix(0,T,1)
marketEnds<-matrix(0,T,1)
marketStarts[1]=1
marketEnds[1]=prodsMarket[1]
if (T > 1) {
for (t in 2:T) {
  marketStarts[t]=marketEnds[t-1]+1
  marketEnds[t]=marketStarts[t]+prodsMarket[t]-1
}
numProdsTotal=marketEnds[T];  			   # Total number of products in all markets


# Specify number of fixed effects
#-------------------------------------------------------------------------------

countries = 5      # Baseline
FE = matrix(0,nrow(data),countries-1)
gdp_capita = as.matrix(data$gdp_per_capita_defl_partner)
q_temp_old = quantile(gdp_capita,1/countries)
FE[,1] = (gdp_capita<q_temp_old)
for (i_q in 2:(countries-1)) {
  q_temp = quantile(gdp_capita,i_q/countries)
  FE[,i_q] = (q_temp_old<=gdp_capita) & (gdp_capita<q_temp)
  q_temp_old = q_temp
}

K = K_A + (countries-1)


# Create Income Draws and Format
#-------------------------------------------------------------------------------

set.seed(20)
Y = array(0, dim=c(K+1,nn,T))
incomedraw_declarant = as.matrix(income_mu_sigma[,1])
incomedraw_year = as.matrix(income_mu_sigma[,2])
mu = as.matrix(income_mu_sigma[,3])
sigma = as.matrix(income_mu_sigma[,4])
for (t in 1:T) {
  index_market = market_id == t  
  declarant_temp = as.matrix(declarant[index_market])
  declarant_temp = declarant_temp[1,1]
  year_temp = as.matrix(year[index_market])
  year_temp = year_temp[1,1]
  index_mu_sigma = incomedraw_declarant==declarant_temp & incomedraw_year==year_temp
  mu_temp = mu[index_mu_sigma]
  sigma_temp = sigma[index_mu_sigma]
  Y_draw = matrix(rlnorm(nn,mu_temp,sigma_temp),1,nn)
  Y[,,t] = t(matrix(rep(Y_draw,K+1),nn,K+1))  
  #Y[,,t] = t(matrix(rep(mean(Y_draw),nn*(K+1)),nn,K+1))     #Choose this option for representative nonhomothetic agent
}
Y = Y/10000
Y = log(Y)


v <- matrix(0,K+1,nn)
v <- as.matrix(v)


set.seed(17)
v_array = array(0, dim=c(2*(K+1),nn,T))
Y_save = matrix(0,T,nn)
for (i_Y in 1:T) {
  v_array[1:(K+1),,i_Y] = v 
  v_array[(K+2):(2*(K+1)),,i_Y] = Y[,,i_Y]
  Y_save[i_Y,] = Y[1,,i_Y]
}
v_save = v
v = v_array


nD = nrow(v)/(K+1)
sigmaxi<-0.01


# Create indices to speed share calculations (following Dube, Fox, and Su (2012))
#-------------------------------------------------------------------------------

oo<-matrix(1,1,nn)
sharesum<-matrix(0,T,numProdsTotal)
for (t in 1:T) {
  sharesum[t,marketStarts[t]:marketEnds[t]]=1
}
sharesum<-Matrix(sharesum, sparse=TRUE)

marketForProducts<-matrix(0,numProdsTotal,1)       # market indices for each product
for (t in 1:T) {
  marketForProducts[marketStarts[t]:marketEnds[t]]=t
}


# Select regressors and instrument
#-------------------------------------------------------------------------------

A_IV <- cbind(data$gdp_per_capita_defl_partner, data$distance, population_log)
A <- cbind(data$distance, population_log) # Baseline

price <- as.matrix(data$price_pc8plus)
price <- price + 1
price = log(price)

z <- cbind(data$iv_feenstra, data$iv_unit * 1000, data$D_exchange_rate)

x_char <- cbind(matrix(1, numProdsTotal, 1), A, price, FE)


# Shares and Outside Option
share <- as.matrix(data$share)
share = 0.98 * share
nopurch = matrix(0.02, numProdsTotal, 1)
y<-log(share)-log(nopurch) 

iv = cbind(matrix(1,numProdsTotal,1), A_IV, z, A_IV^2, A_IV^3, z^2, z^3)
iv_alt = cbind(matrix(1,numProdsTotal,1), A_IV, z, A_IV^2, z^2)

rec_number = rcond(t(iv)%*%iv)
if (rec_number<1e-15) {
  iv = iv_alt
}

IV<-iv


# Obtain Initial Guess
#===============================================================================

t_error <- try(iv%*%solve(t(iv)%*%iv)%*%t(iv), silent = TRUE)
if(("try-error" %in% class(t_error)) == FALSE) {

pz<-iv%*%solve(t(iv)%*%iv)%*%t(iv)
xhat<-pz%*%x_char
PX2sls<-solve(t(xhat)%*%xhat)%*%t(xhat)              #project. matrix on weighted x-space for 2SLS
beta2sls<-PX2sls%*%y
se2sls = sqrt(diag( mean((y-x_char%*%beta2sls)^2)*solve(t(x_char)%*%pz%*%x_char)))


expmeanval0<-exp(y)		     						# starting value for mean values 
W<-solve(t(IV)%*%IV)		     						# GMM weighting matrix
PX<-solve(t(x_char)%*%IV%*%W%*%t(IV)%*%x_char)%*%t(x_char)%*%IV%*%W%*%t(IV) 	# Used to calculate starting values


theta20 <- matrix(0,nD*(K_A+1) + (countries-1) + nD,1)
theta20[(nD*(K_A+1)+1):(nD*(K_A+1)+countries-1),1] = beta2sls[(K_A+2):(K_A+1+(countries-1))]

set.seed(10)
startvalues<-t(matrix(rep(theta20,starts), ncol = starts))*t(cbind(matrix(1,length(theta20),1),t(matrix(runif((starts-1)*length(theta20)),(starts-1),length(theta20))))) 
startvalues <- as.matrix(startvalues)

startvalues[,8] = c(-0.2, -0.1, 0, 0.1, 0.2)
theta20[8,1] = startvalues[reps,8]

nIV=ncol(IV)			# Number instrumental variables


# MPEC ESTIMATION OF RANDOM COEFFICIENTS LOGIT MODEL 
#===============================================================================

GMPEC<-1.0e20
CPUtMPEC<-0
FuncEvalMPEC<-0
GradEvalMPEC<-0
HessEvalMPEC<-0
expmeanval<-expmeanval0

X_MPEC <- matrix(0, (K_A+1) + nrow(theta20) + numProdsTotal + nIV, 1)
objective_MPEC <- 0
status_MPEC <- 999

theta20<-startvalues[reps,]
delta0 <- invertshares_FE(theta20, x_char, expmeanval, tol_inner, share, v, Y, oo, sharesum, marketForProducts, marketStarts, marketEnds, K, K_A, nD, T, nn, countries)      # starting values for mean utilities using BLP inversion


theta20<-as.matrix(theta20)    
theta10<-PX%*%delta0  					# starting values for linear parameters using IV like regression of delta on covariates
theta10 = theta10[1:(K_A+1)]
theta10<-as.matrix(theta10)
resid0<-delta0-x_char[,1:(K_A+1)]%*%theta10[1:(K_A+1),1] 				# starting values for xi terms, not directly used
resid0<-as.matrix(resid0)	
g0<-t(IV)%*%resid0  					# starting values for GMM moments
g0<-as.matrix(g0)
x0<-rbind(theta10,theta20,delta0,g0)		# master starting values

# BOUNDS    
x_L<-(-Inf)*matrix(1,length(x0),1)     	# Lower bounds for x.
x_U<-(Inf)*matrix(1,length(x0),1)   	# Upper bounds for x.

nx0<-length(x0)					      # number of MPEC optimization parameters


# Parameter Restrictions:
#-------------------------------------------------------------------------------

# The code in principle allows for more complex interactions between coefficients, country FEs, and demographics
# -> Here: Choose a specification with price-income interactions and 4 country FEs

yyy=1     # Set which regressors correlate with income (turn off if yyy = 0, price-income interactions: yyy = 1)
vvv=0     # Permit random effects (turn off if vvv = 0)
bbb=4     # Number of country-group fixed effects (countries - 1)
sss=0     # Permit interactions between fixed effects and random draws (turn off if sss = 0)


x_U[((K_A+1)+1):(2*(K_A+1)),1] = rbind(matrix(Inf,vvv,1), matrix(0,K_A+1-vvv,1))
x_L[((K_A+1)+1):(2*(K_A+1)),1] = rbind(matrix(Inf,vvv,1), matrix(0,K_A+1-vvv,1)) 
x0[((K_A+1)+1):(2*(K_A+1)),1] = as.matrix(x0[((K_A+1)+1):(2*(K_A+1)),1])*rbind(matrix(1,vvv,1), matrix(0,K_A+1-vvv,1))

x_L[(2*(K_A+1)+1):(3*(K_A+1)),1] = rbind(matrix(0,K_A+1-yyy,1), matrix(-Inf,yyy,1))
x_U[(2*(K_A+1)+1):(3*(K_A+1)),1] = rbind(matrix(0,K_A+1-yyy,1), matrix(Inf,yyy,1))
x0[(2*(K_A+1)+1):(3*(K_A+1)),1] = as.matrix(x0[(2*(K_A+1)+1):(3*(K_A+1)),1])*rbind(matrix(0,K_A+1-yyy,1), matrix(1,yyy,1))

x_L[(3*(K_A+1)+1):(3*(K_A+1) + (countries-1)),1] = rbind(matrix(-Inf,bbb,1), matrix(0,(countries-1)-bbb,1))
x_U[(3*(K_A+1)+1):(3*(K_A+1) + (countries-1)),1] = rbind(matrix(Inf,bbb,1), matrix(0,(countries-1)-bbb,1))
x0[(3*(K_A+1)+1):(3*(K_A+1) + (countries-1)),1] = as.matrix(x0[(3*(K_A+1)+1):(3*(K_A+1) + (countries-1)),1])*rbind(matrix(1,bbb,1), matrix(0,(countries-1)-bbb,1))

x_L[(3*(K_A+1) + (countries-1) +1):(3*(K_A+1) + (countries-1) +nD),1] = rbind(matrix(-Inf,sss,1), matrix(0,nD-sss,1))
x_U[(3*(K_A+1) + (countries-1) +1):(3*(K_A+1) + (countries-1) +nD),1] = rbind(matrix(Inf,sss,1), matrix(0,nD-sss,1))
x0[(3*(K_A+1) + (countries-1) +1):(3*(K_A+1) + (countries-1) +nD),1] = as.matrix(x0[(3*(K_A+1) + (countries-1) +1):(3*(K_A+1) + (countries-1) +nD),1]) * rbind(matrix(1,sss,1), matrix(0,nD-sss,1))


# Sparsity Patterns of Constraints, 0 if derivative, 1 otherwise
#-------------------------------------------------------------------------------

# Derivatives of market Shares

c11<-matrix(0,numProdsTotal,K_A+1)		
c12<-matrix(1,numProdsTotal, nD*(K_A+1)) 		 	

c13 = matrix(1, numProdsTotal, countries-1)
c14 = matrix(1, numProdsTotal, nD)

c15<-matrix(0,numProdsTotal,numProdsTotal)	
for (t in 1:T)  {
  c15[marketStarts[t]:marketEnds[t],marketStarts[t]:marketEnds[t]]=1
}

c16<-matrix(0,numProdsTotal,nIV)		

# Derivatives of moments

c21<-matrix(1,nIV, K_A+1)					# moments with respect to mean parameters 
c22<-matrix(0,nIV, nD*(K_A+1) + (countries-1) + nD)				      # moments with respect to standard deviations
c23<-matrix(1,nIV, numProdsTotal)		      # moments with respect to mean utilities
c24<-diag(nIV)		    				# moments with respect to moment values 

ConsPattern1<-cbind(c11,c12,c13,c14,c15,c16)
ConsPattern2<-cbind(c21,c22,c23,c24)
ConsPattern<-rbind(ConsPattern1,ConsPattern2)

ConsPattern_true<-ConsPattern


# Specify Hessian Sparsity Pattern
#-------------------------------------------------------------------------------

HessianPattern<-matrix(0,nx0,nx0)

HessianPattern[((nD+1)*(K_A+1) + (countries-1) + nD + numProdsTotal+1):nrow(HessianPattern), ((nD+1)*(K_A+1) + (countries-1) + nD + numProdsTotal+1):ncol(HessianPattern)] = matrix(1, nIV, nIV)

# Hessian for theta2, theta3, theta4 squared
for (dd in 2:(nD+1)) {
  for (ddd in 2:(nD+1)) {    
    HessianPattern[((dd-1)*(K_A+1)+1):(dd*(K_A+1)),((ddd-1)*(K_A+1)+1):(ddd*(K_A+1))] = matrix(1, K_A+1, K_A+1) 
  } 
}

# Hessian for delta and theta2, theta3, theta4
for (dd in 1:nD) {
  HessianPattern[((nD+1)*(K_A+1) + (countries-1) + nD +1):((nD+1)*(K_A+1) + (countries-1) + nD +numProdsTotal), (dd*(K_A+1)+1):((dd+1)*(K_A+1))] = matrix(1, numProdsTotal, K_A+1)
  HessianPattern[(dd*(K_A+1)+1):((dd+1)*(K_A+1)), ((nD+1)*(K_A+1)+(countries-1)+nD +1):((nD+1)*(K_A+1)+(countries-1)+nD + numProdsTotal)] = t(matrix(1, numProdsTotal, K_A+1))
}

# Hessian for betaf
HessianPattern[((nD+1)*(K_A+1)+1):((nD+1)*(K_A+1)+(countries-1)), ((nD+1)*(K_A+1)+1):((nD+1)*(K_A+1)+(countries-1))] = matrix(1,countries-1,countries-1)                                             #dL2dbetaf2              
HessianPattern[((nD+1)*(K_A+1)+1):((nD+1)*(K_A+1)+(countries-1)), (K_A+2):((nD+1)*(K_A+1))] = matrix(1,countries-1,nD*(K_A+1))                                                                       #dL2dbetafdtheta2                                          
HessianPattern[(K_A+2):((nD+1)*(K_A+1)),((nD+1)*(K_A+1)+1):((nD+1)*(K_A+1)+(countries-1))] = matrix(1,nD*(K_A+1),countries-1)                                                                        #dL2dbetafdtheta2'                       
HessianPattern[((nD+1)*(K_A+1)+1):((nD+1)*(K_A+1)+(countries-1)), ((nD+1)*(K_A+1)+(countries-1)+1):((nD+1)*(K_A+1)+(countries-1)+nD)] = matrix(1,countries-1,nD);                                    #dL2dbetafdsigmaf    
HessianPattern[((nD+1)*(K_A+1)+(countries-1)+1):((nD+1)*(K_A+1)+(countries-1)+nD), ((nD+1)*(K_A+1)+1):((nD+1)*(K_A+1)+(countries-1))] = matrix(1,nD,countries-1);                                    #dL2dbetafdsigmaf'   
HessianPattern[((nD+1)*(K_A+1)+1):((nD+1)*(K_A+1)+(countries-1)), ((nD+1)*(K_A+1)+(countries-1)+nD +1):((nD+1)*(K_A+1)+(countries-1)+nD + numProdsTotal)] = matrix(1,countries-1,numProdsTotal);     #dL2ddeltadbetaf';     
HessianPattern[((nD+1)*(K_A+1)+(countries-1)+nD +1):((nD+1)*(K_A+1)+(countries-1)+nD + numProdsTotal), ((nD+1)*(K_A+1)+1):((nD+1)*(K_A+1)+(countries-1))] = matrix(1,numProdsTotal, countries-1);    #dL2ddeltadbetaf;    


# Hessian for sigmaf
HessianPattern[((nD+1)*(K_A+1)+(countries-1)+1):((nD+1)*(K_A+1)+(countries-1)+nD), ((nD+1)*(K_A+1)+(countries-1)+1):((nD+1)*(K_A+1)+(countries-1)+nD)] = matrix(1,nD,nD)                                       #dL2dsigmaf2;    
HessianPattern[((nD+1)*(K_A+1)+(countries-1)+1):((nD+1)*(K_A+1)+(countries-1)+nD), (K_A+2):((nD+1)*(K_A+1))] = matrix(1,nD,nD*(K_A+1))                                                                         #dL2dtheta2dsigmaf;             
HessianPattern[(K_A+2):((nD+1)*(K_A+1)), ((nD+1)*(K_A+1)+(countries-1)+1):((nD+1)*(K_A+1)+(countries-1)+nD)] = matrix(1,nD*(K_A+1),nD)                                                                         #dL2dtheta2dsigmaf';               
HessianPattern[((nD+1)*(K_A+1)+(countries-1)+1):((nD+1)*(K_A+1)+(countries-1)+nD), ((nD+1)*(K_A+1)+(countries-1)+nD +1):((nD+1)*(K_A+1)+(countries-1)+nD + numProdsTotal)] = matrix(1,nD, numProdsTotal)       #dL2ddeltadsigmaf'; 
HessianPattern[((nD+1)*(K_A+1)+(countries-1)+nD +1):((nD+1)*(K_A+1)+(countries-1)+nD + numProdsTotal), ((nD+1)*(K_A+1)+(countries-1)+1):((nD+1)*(K_A+1)+(countries-1)+nD)] = matrix(1,numProdsTotal, nD)       #dL2ddeltadsigmaf;  


for (t in 1:T) {
  range <- marketStarts[t]:marketEnds[t]
  index = (nD+1)*(K_A+1) + (countries-1) + nD + range
  HessianPattern[index,index]=matrix(1,prodsMarket[t],prodsMarket[t])
}



# OPTIONS
opts <- list("tol"=1.0e-6, "max_iter"=150, "linear_solver"="ma57", "jac_c_constant"="no", "hessian_constant"="no", "mu_strategy" = "adaptive")


nr=nrow(ConsPattern_true)
nc=ncol(ConsPattern_true)

constraint_lb=matrix(0,nr,1)
constraint_lb<-as.numeric(constraint_lb)
constraint_ub=matrix(0,nr,1)
constraint_ub<-as.numeric(constraint_ub)



ConsPattern<-make.sparse(ConsPattern_true)

HessianPattern_tri = HessianPattern
HessianPattern_tri[upper.tri(HessianPattern_tri)]=0


HessianPattern_true<-HessianPattern_tri
HessianPattern_tri<-make.sparse(HessianPattern_tri)

index_Jacobian=(ConsPattern_true==1)
index_Hessian<-t(HessianPattern_true==1)


# Estimation
#-------------------------------------------------------------------------------

auxdata<-new.env()
auxdata$W<-W
auxdata$K<-K
auxdata$nD<-nD
auxdata$prods<-prods
auxdata$T<-T
auxdata$numProdsTotal<-numProdsTotal
auxdata$x_char<-x_char
auxdata$IV<-IV
auxdata$v<-v
auxdata$nn<-nn
auxdata$share<-share
auxdata$marketStarts<-marketStarts
auxdata$marketEnds<-marketEnds
auxdata$oo<-oo
auxdata$sharesum<-sharesum
auxdata$marketForProducts<-marketForProducts
auxdata$delta<-delta0
auxdata$index_Jacobian<-index_Jacobian
auxdata$index_Hessian<-index_Hessian
auxdata$Y<-Y
auxdata$K_A<-K_A
auxdata$countries<-countries

ipopt_output <- ipoptr(x0=x0, eval_f=GMMMPEC_f_ktr_FE, eval_grad_f=GMMMPEC_f_grad_ktr_FE, lb=x_L, ub=x_U, eval_g=GMMMPEC_c_ktr_FE, eval_jac_g=GMMMPEC_dc_ktr_FE, eval_jac_g_structure=ConsPattern, constraint_lb = constraint_lb, constraint_ub = constraint_ub, eval_h=GMMMPEC_hess_ktr_FE, eval_h_structure=HessianPattern_tri, opts=opts)
X_MPEC = as.matrix(ipopt_output$solution)
objective_MPEC = ipopt_output$objective
status_MPEC = ipopt_output$status

getwd()

# Save to file
#-------------------------------------------------------------------------------

dataset_name <- sprintf("1_results_subsample/resultsr_%s_%s.RData", (kk + 100*(i_group-1)), reps)
save(file = dataset_name, data, share, price, beta2sls, delta0, x0, X_MPEC, objective_MPEC, status_MPEC, K, K_A, FE, v_save, Y_save, nn)

}
}
}



